* Generate a dataset with the variables that we want to collapse into a ballot-level analysis.
* max(variables that either have a consistent numerical value across the ballot, or have dummy indicators)
* sum categorical to indicate how many latinos are on ballot, and new measure of dem partisan strength with demvotes and total votes

* Note: collapse clears the filename macro c(filename) so we will reload the original data set and recreate the collapsed data for each file that uses the collapsed data set.

collapse (max) majpocpop prered2000 prered2010 postred2010 postred2000 ballot_mmd newmmd2010 newmmd2000  win_newlatm win_newlatw inc_win woman_win afampoppct asampoppct latpoppct whpoppct pocpoppct  tlimit  legprof demvoteshareballot_2020 distrepubstr  openelect  multimem allincfmt educcolldeg unemppct v05 v03 v07 v09 repind_win b_afamwwinner b_afammwinner/*
*/ b_asamwwinner b_asammwinner b_latwwinner b_latmwinner b_whwwinner b_whmwinner repind_pres stateyear first_winGRACE first_appearGRACE (sum) ballot_newlatwcount=new_latw ballot_newlatmcount=new_latm b_incwom=inc_bwomen b_incbwoc=inc_bwoc b_incbmoc=inc_bmoc b_inclatm=inc_blatm b_inclatw=inc_blatw b_incpoc=inc_poc  b_inclat=inc_blat v22 d_votes ri_votes v23 candperseat=candidate_dummy ballot_latwcount=basket_latw ballot_latmcount=basket_latm ballot_womencount=basket_women  b_latmD b_latwD b_latmR b_latwR /*
*/ basket_afamw basket_afamm newlatw2000 newlatm2000  (first) v02/*
*/ , by(ballots_2021)

***generate dummy indicators of ballot presence***
gen ballot_latwpres = 1 if ballot_latwcount!=0
replace ballot_latwpres=0 if ballot_latwpres==.


gen ballot_latmpres = 1 if ballot_latmcount!=0
replace ballot_latmpres=0 if ballot_latmpres==.


gen ballot_latallpres = 1 if ballot_latwcount!=0 
replace ballot_latallpres=1 if ballot_latmcount!=0
replace ballot_latallpres=0 if ballot_latallpres==.

gen ballot_newlatwpres = 1 if ballot_newlatwcount!=0
replace ballot_newlatwpres=0 if ballot_newlatwpres==.


gen ballot_newlatmpres = 1 if ballot_newlatmcount!=0
replace ballot_newlatmpres=0 if ballot_newlatmpres==.


gen ballot_newlatwpresMMD = 1 if ballot_newlatwcount!=0 & pocpoppct>=.5
replace ballot_newlatwpresMMD=0 if ballot_newlatwpresMMD==.


gen ballot_newlatmpresMMD = 1 if ballot_newlatmcount!=0 & pocpoppct>=.5
replace ballot_newlatmpresMMD=0 if ballot_newlatmpresMMD==.



gen ballot_newlatwpresLAT = 1 if ballot_newlatwcount!=0 & latpoppct>=.5
replace ballot_newlatwpresLAT=0 if ballot_newlatwpresLAT==.


gen ballot_newlatmpresLAT = 1 if ballot_newlatmcount!=0 & latpoppct>=.5
replace ballot_newlatmpresLAT=0 if ballot_newlatmpresLAT==.


****generate variable to indicate how many incumbents from different groups are on the ballot at the same time***
egen stateyr_incpoc = sum(b_incpoc), by (stateyear)

egen stateyr_inc = sum(v22), by(stateyear)


gen stateyear_incpocpct = (stateyr_incpoc/stateyr_inc)
replace stateyear_incpocpct=0 if stateyear_incpocpct==.

egen stateyr_incwoc = sum(b_incbwoc), by (stateyear)
gen stateyear_incwocpct = (stateyr_incwoc/stateyr_inc)
replace stateyear_incwocpct=0 if stateyear_incwocpct==.


egen stateyr_incmoc = sum(b_incbmoc), by (stateyear)
gen stateyear_incmocpct = (stateyr_incmoc/stateyr_inc)
replace stateyear_incmocpct=0 if stateyear_incmocpct==.


egen stateyr_incwom = sum(b_incwom), by (stateyear)
gen stateyear_incwompct = (stateyr_incwom/stateyr_inc)
replace stateyear_incwompct=0 if stateyear_incwompct==.

egen stateyr_inclat = sum(b_inclat), by (stateyear)
gen stateyear_inclatpct = (stateyr_inclat/stateyr_inc)
replace stateyear_inclatpct=0 if stateyear_inclatpct==.


egen stateyr_inclatw = sum(b_inclatw), by (stateyear)
gen stateyear_inclatwpct = (stateyr_inclatw/stateyr_inc)
replace stateyear_inclatwpct=0 if stateyear_inclatwpct==.


egen stateyr_inclatm = sum(b_inclatm), by (stateyear)
gen stateyear_inclatmpct = (stateyr_inclatm/stateyr_inc)
replace stateyear_inclatmpct=0 if stateyear_inclatmpct==.

****how many districts are latino majority?*****
gen latmaj=1 if latpoppct>=.500000001
replace latmaj=0 if latmaj==.


* Note, we do not generate inc_whm earlier in the file so this is commented out
// egen stateyr_incwhm = sum(b_incwhm), by (stateyear)
// gen stateyear_incwhmpct = (stateyr_incwhm/stateyr_inc)
// replace stateyear_incwhmpct=0 if stateyear_incwhmpct==.
// summarize stateyear_incwhmpct, detail


* ballot indicator of open seat mmd, and then a count of open seat mmds and all mmds in that state year
gen ballot_openmmd=1 if ballot_mmd==1 & openelect==1
replace ballot_openmmd=0 if ballot_openmm==.

egen stateyear_mmdballotcount = sum(ballot_mmd), by (stateyear)

egen stateyear_openmmdballotcount = sum(ballot_openmmd), by (stateyear)

gen ballot_dummy=1

egen stateyear_allballots= sum(ballot_dummy), by(stateyear)
egen stateyear_openallballots = sum(openelect), by(stateyear)

***generating variables to indicate what proportion of all ballots in a state are MMD*******
gen mmdproportion=stateyear_mmdballotcount/stateyear_allballots
replace mmdproportion=0 if mmdproportion==.


gen openmmdproportion=stateyear_openmmdballotcount/stateyear_openallballots
replace openmmdproportion=0 if openmmdproportion==.
